*****************************************************************
* Replication directory for                                   ***
* Prime locations                                             ***
* by Gabriel M. Ahlfeldt, Thilo N.H. Albers, Kristian Behrens ***
* Published in American Economic Review: Insights             ***
*****************************************************************
* 01/2025
* Stata
version 17.0

* This script analyses block group data to provide gradient estimates 
* and measures of prime location accessibility

* Read distances from block group data to PLs
	import delimited "$temp/GISUS_using/BlockGroup2PLdist.csv", clear
	
* Merge block group data
	merge 1:1 geoid using "$data_USMETROS/BlockData/block_group_data.dta"
	drop _m
	
* Merge prime location information
	ren plid_us PLID_US
	merge m:1 PLID_US using "$temp/US-CBSAs/p${SL}/PL-data.dta", keepusing(PLrankByCity PLrankUS PL_emp metro_emp PL CBD cbsafp PL_TSempshare)
	drop _m
	
* Rescale distance variables to be measured in km
	replace distpl = distpl/1000
	
* Value per housing unit
	gen h_val_unit = h_val_total / housing_units
	gen pop_dens = population / aland10
	gen housing_dens = housing / aland10
	
* Take gradient variables into logs
	local varlist pop_dens housing_dens med_hh_income med_contract_rent   h_val_unit
	foreach var of varlist `varlist' {
		gen l`var'=ln(`var')
	}	
	
* Collapse to ditance bin x prime location catchment area level
	preserve
	drop if distpl > 50	// Restrict to places within 50 km of nearest prime location
	gen distplbin = int(distpl)
	collapse lpop_dens lhousing_dens lmed_hh_income lmed_contract_rent  lh_val_unit metro_emp PLrankByCity PL_TSempshare PL_emp, by(distplbin PLID_US)
	gen ldistplbin = ln(distplbin)
	
* label variables
	label var distpl "Distance from PL (km)" 
	label var ldistpl "Ln distance from PL" 
	label var lpop_dens "Ln population density"
	label var lhousing_dens "ln housing density"
	label var lmed_hh_income "ln median household income" 
	label var lmed_contract_rent "ln median rent" 
	label var lh_val_unit "ln mean housing value" 
	
	foreach var in `varlist' {
		eststo: reg l`var' distpl, abs(PLID_US) cluster(PLID_US)
	}	
* Write Appendix Table B.2.6, panel a	
	capture mkdir "$tables_App/US-CBSAs"
	esttab  using "$tables_App/US-CBSAs/TAB_B2_6a_Gradients_A_all.tex", replace b(3) se(4) label compress   r2(3) ///
			stats(MSAFE N r2 , fmt(%18.3g )   labels("MSA effects"  `"Observations"' `"\(R^{2}\)"' ))  ///   stats(Country_effects IV N r2) 0.* "`City'"
			title("Prime location gradients in US MSAs") modelwidth(6) nostar  addnote( "Unit of observation block group. Standard errors clustered on MSAs." )	
			eststo clear
			
	foreach var in `varlist' {
		eststo: reg l`var' distpl if metro_emp >= 1000, abs(PLID_US) cluster(PLID_US)
	}
* Write Appendix Table B.2.6, panel b		
	esttab  using "$tables_App/US-CBSAs/TAB_B2_6b_Gradients_B_largeMSAs.tex", replace b(3) se(4) label compress   r2(3) ///
			stats(MSAFE N r2 , fmt(%18.3g )   labels("MSA effects"  `"Observations"' `"\(R^{2}\)"' ))  ///   stats(Country_effects IV N r2) 0.* "`City'"
			title("Prime location gradients in US MSAs") modelwidth(6) nostar  addnote( "Unit of observation block group. Standard errors clustered on MSAs." )	
			eststo clear
			
	foreach var in `varlist' {
		eststo: reg l`var' distpl if metro_emp >= 1000 & PLrankByCity == 1, abs(PLID_US) cluster(PLID_US)
	}	
* Write Appendix Table B.2.6, panel c	
	esttab  using "$tables_App/US-CBSAs/TAB_B2_6c_Gradients_C_primary.tex", replace b(3) se(4) label compress   r2(3) ///
			stats(MSAFE N r2 , fmt(%18.3g )   labels("MSA effects"  `"Observations"' `"\(R^{2}\)"' ))  ///   stats(Country_effects IV N r2) 0.* "`City'"
			title("Prime location gradients in US MSAs") modelwidth(6) nostar  addnote( "Unit of observation block group. Standard errors clustered on MSAs." )	
			eststo clear
			
	foreach var in `varlist' {
		eststo: reg l`var' distpl if metro_emp >= 1000 & PLrankByCity > 1, abs(PLID_US) cluster(PLID_US)
		qui estadd local MSAFE "Yes"	
	}
* Write Appendix Table B.2.6, panel d
	esttab  using "$tables_App/US-CBSAs/TAB_B2_6d_Gradients_D_secondary.tex", replace b(3) se(4) label compress   r2(3) ///
			stats(MSAFE N r2 , fmt(%18.3g )   labels("MSA effects"  `"Observations"' `"\(R^{2}\)"' ))  ///   stats(Country_effects IV N r2) 0.* "`City'"
			title("Prime location gradients in US MSAs") modelwidth(6) nostar  addnote( "Unit of observation block group. Standard errors clustered on MSAs." )	
			eststo clear
	restore		
	
* Prepare data for further analysis ********************************************

* Compute population within 20km of prime locations
	foreach val of numlist 5 10 15 20 25 50 {
		qui gen pop`val'kmPL = (distpl <= `val')*population
		qui egen MSApop`val'kmPL = sum(pop`val'kmPL), by(cbsafp)	
		capture egen MSApop = sum(population), by(cbsafp)
		qui gen MSApopshare`val'kmPL = MSApop`val'kmPL / MSApop *100
		qui sum pop`val'kmPL
		local popd = r(sum)
		qui sum population
		local pop = r(sum)
		local popshare = round(`popd'/`pop'*100,1)
		display "Population share within `val' km of nearest prime locations: `popshare' %"
	}
	label var MSApopshare20kmPL "Share of population within 20 km of prime location"
	preserve
	keep cbsafp MSApopshare20kmPL 
	duplicates drop
	save "$temp/GISUS_using/MSApop20kmPL", replace
	restore 
* Compute average travel time by MSA	
	gen test = tt_total - tt_less_5 - tt_5_9 -tt_10_14- tt_15_19 -tt_20_24- tt_25_29 -tt_30_34 -tt_35_39- tt_40_44 -tt_45_59 -tt_60_89 -tt_more_90 // test passed, all zeros
	ren tt_less_5 tt_0_5
	ren tt_more_90 tt_90_plus
	gen tt_average = 0 
	foreach num of numlist 0(5)40 {
		replace tt_average = tt_average + tt_`num'_ / tt_total * (`num'+2.5)
	}
	replace tt_average = tt_average + tt_45_59 / tt_total * 52.5
	replace tt_average = tt_average + tt_60_89 / tt_total * 75
	replace tt_average = tt_average + tt_90_plus / tt_total * 100
	
	preserve
	collapse (mean) MSA_tt_average = tt_average [w=population], by(cbsafp)
	label var MSA_tt_average "Average commuting time (minutes)"
	save "$temp/GISUS_using/MSAttav", replace
	restore 
	
* Compute population within 20km by PLID_US
	preserve
	keep if distpl <= 20
	keep PLID_US population
	collapse (sum) PL_pop20km = population , by(PLID_US)
	save "$temp/GISUS_using/PLpop20km", replace
	restore
	
* Script ends